In [1]:
#Importing library and initiating API client
import pandas as pd ##Data manipulation and analysis
from googleapiclient.discovery import build ##Interacting with Google APIs
from IPython.display import JSON #JSON to readable format
#####Data visualization
import seaborn as sns # Statistical data visualization
import matplotlib.pyplot as plt # Plotting graphs and charts
import matplotlib.ticker as ticker # Configuring tick locations and formatting for plots
# Word cloud
import nltk # Natural language processing toolkit
from nltk.corpus import stopwords # Commonly used words that are usually filtered out in text analysis
from nltk.tokenize import word_tokenize # Tokenizing words in text
nltk.download('stopwords') # Download the list of stopwords
nltk.download('punkt') # Download the Punkt tokenizer models
from wordcloud import WordCloud # Generating word clouds from text
# Initialize the YouTube API client
API = build('youtube', 'v3', developerKey='AIzaSyBzS2VTt1-1Wm8t0AE1M42VNqopkUmtSHg') #Here AIzaSyBzS2VTt1-1Wm8t0AE1M42VNqopkUmtSHg is the API key
[nltk_data] Downloading package stopwords to [nltk_data] C:\Users\singh\AppData\Roaming\nltk_data... [nltk_data] Package stopwords is already up-to-date! [nltk_data] Downloading package punkt to [nltk_data] C:\Users\singh\AppData\Roaming\nltk_data... [nltk_data] Package punkt is already up-to-date!
In [2]:
#Fetcing data
datarequested = API.search().list( #fetching video data
q='health and coffee',
part='snippet',
maxResults=50,
type='video',
order='relevance'
)
dataasked = datarequested.execute()
youtubevideos = [
{
'title': item['snippet']['title'],
'channelTitle': item['snippet']['channelTitle'], # Geeting video information
'videoId': item['id']['videoId'],
'videoUrl': f"https://www.youtube.com/watch?v={item['id']['videoId']}"
}
for item in dataasked['items']
]
videodetails = [video['videoId'] for video in youtubevideos]
def get_stats(API, videodetails): #get stats
videodescription = []
for i in range(0, len(videodetails), 50):
data_asked = API.videos().list(
part="snippet,statistics",
id=','.join(videodetails[i:i+50])
).execute()
for video in data_asked['items']:
details = {
'channelTitle': video['snippet']['channelTitle'],
'title': video['snippet']['title'],
'tags': video['snippet'].get('tags', []),
'viewCount': int(video['statistics'].get('viewCount', 0)),
'likeCount': int(video['statistics'].get('likeCount', 0)),
'commentCount': int(video['statistics'].get('commentCount', 0))
}
details['tagCount'] = len(details['tags']) if details['tags'] else 0
videodescription.append(details)
dataframe = pd.DataFrame(videodescription)
return dataframe
In [3]:
# Get video statistics
videostatistics = get_stats(API, videodetails)
videostatistics
Out[3]:
| channelTitle | title | tags | viewCount | likeCount | commentCount | tagCount | |
|---|---|---|---|---|---|---|---|
| 0 | Click On Detroit | Local 4 | WDIV | How drinking coffee impacts your health | [Coffee, Health, Research, Science, Wellness W... | 55727 | 396 | 44 | 5 |
| 1 | Mayo Clinic | Mayo Clinic Minute: Health Benefits of Coffee | [Mayo Clinic (Hospital), Health Care (Issue), ... | 187930 | 1734 | 0 | 6 |
| 2 | Good Morning America | New study shows the health benefits of coffee | [Ashton, Dr., Jen, benefits, coffee, health, n... | 55415 | 630 | 68 | 10 |
| 3 | WXYZ-TV Detroit | Channel 7 | Healthiest Types of Coffee with Dr. Oz | [Detroit, Local News, distributable] | 60491 | 612 | 56 | 3 |
| 4 | Doctor Mike | The Ugly Truth About Coffee’s Effects On Your ... | [doctor mike, dr mike, drmike, dr. mike, mikha... | 4829479 | 153737 | 8582 | 35 |
| 5 | Doctor Mike Hansen | Why Coffee Is Killing You...Slowly | [Why Coffee Is Killing You, acrylamide in coff... | 256735 | 6319 | 1450 | 11 |
| 6 | ZOE | Is Coffee Healthy? | James Hoffmann and Profes... | [JIMSEVEN] | 951104 | 13962 | 1183 | 1 |
| 7 | Dr. Eric Berg DC | 8 Unexpected Benefits of COFFEE You've Never H... | [9 unexpected benefits of coffee, benefits of ... | 840822 | 26569 | 1923 | 25 |
| 8 | Weight Loss Freak ! | WHY YOU SHOULD DRINK BLACK TEA EVERY DAY: 5 AM... | [Weight Loss Freak, health and wellness, healt... | 490 | 11 | 0 | 22 |
| 9 | WXYZ-TV Detroit | Channel 7 | Ask Dr. Nandi: Is decaf coffee harmful to health? | [Detroit, metro Detroit, 7 Action News, Ask Dr... | 95945 | 702 | 91 | 7 |
| 10 | TheHealthNerd | Black Coffee Benefits: 9 Proven Health Benefit... | [black coffee benefits, benefits of black coff... | 866033 | 18180 | 1440 | 19 |
| 11 | The Dr. Gundry Podcast | Is It SAFE To Drink Coffee Everyday? (Shocking... | [dr gundry, dr. gundry, steven gundry, gundry ... | 148362 | 3576 | 415 | 17 |
| 12 | Health | 6 Ways Coffee Benefits Your Health | #DeepDive... | [health, coffee, coffee health problems, coffe... | 187833 | 2687 | 74 | 24 |
| 13 | The Yoga Institute | Coffee - Good or Bad? | Dr. Hansaji Yogendra | [yogainstitutemumbai, firstyogaschoolintheworl... | 53004 | 1153 | 20 | 36 |
| 14 | Dr. Eric Berg DC | Drink COFFEE for a Fatty Liver and Gallstones | [drink this for a fatty liver and gallstones, ... | 624126 | 20119 | 1892 | 25 |
| 15 | Dr. William Li | Scientific Study about the Health Benefits of ... | [] | 32596 | 1239 | 68 | 0 |
| 16 | JJ Medicine | Coffee: Health Benefits (ex. Reduction of Canc... | [Coffee, Coffee health, Coffee cancer, Coffee ... | 57082 | 1003 | 37 | 18 |
| 17 | Dr. Livingood | The health benefits of coffee. | [The 9 Benefits of Coffee, coffee benefits, co... | 19505 | 663 | 85 | 11 |
| 18 | Thomas DeLauer | 30 Days of NO CAFFEINE has Surprising Effects | [quit caffeine, 30 days no caffeine, 30 days n... | 729207 | 13185 | 1546 | 19 |
| 19 | Healthline | Decaf Coffee: Healthy or Unhealthy? | [coffee, dietitian, decaf, nutrition, nutritio... | 200996 | 3739 | 227 | 14 |
| 20 | HealthNormal | 11 Facts About Coffee You Had No Idea About! | [Coffee Benefits, Coffee, Coffee Health Benefi... | 3030343 | 56207 | 1453 | 16 |
| 21 | motivationaldoc | What Happens to Honey in Hot Teas or Coffee! ... | [] | 531166 | 28027 | 581 | 0 |
| 22 | WDTNTV | Lemon coffee trend: Health experts caution sid... | [2newsvideo, video] | 26318 | 57 | 4 | 2 |
| 23 | motivationaldoc | What Coffee Does to the Heart, Brain, & Body -... | [coffee, caffeine, stimulant, high blood press... | 6288328 | 105246 | 7933 | 39 |
| 24 | KenDBerryMD | Is COFFEE bad for you? The (Truth about Coffee... | [is coffee bad for you, dr berry coffee, is co... | 449548 | 26119 | 2592 | 18 |
| 25 | BRIGHT SIDE | 7 Facts About Coffee You Probably Didn’t Know | [health care, coffee, coffee benefits, facts a... | 13596855 | 166939 | 5734 | 18 |
| 26 | WPTV News - FL Palm Beaches and Treasure Coast | 'Mushroom coffee' gaining popularity for healt... | [mushroom coffee, health benefits of mushroom ... | 35503 | 278 | 29 | 5 |
| 27 | motivationaldoc | The Coffee Cortisol Connection...1 Thing Not T... | [coffee, caffeine, stimulant, high blood press... | 555836 | 26267 | 1543 | 39 |
| 28 | KenDBerryMD | What’s in Dr Berry’s Coffee? | [coffee time, coffee, cafe] | 593928 | 27002 | 1642 | 3 |
| 29 | Dr. Carlos | THE TRUTH ABOUT COFFEE, THIS IS WHAT COFFEE AC... | [benefits of caffeine, benefits of coffee, blo... | 807019 | 10285 | 644 | 19 |
| 30 | University of California Television (UCTV) | Tea or Coffee? | [Health, Diet, Wellness, Holistic Health, Inte... | 34654 | 517 | 23 | 7 |
| 31 | AsapSCIENCE | Your Brain On Coffee | [Science, AsapSCIENCE, Coffee, Caffeine, Brain... | 6442507 | 82144 | 5655 | 51 |
| 32 | motivationaldoc | The Right Time to Drink Your Coffee! Dr. Mand... | [] | 481626 | 22238 | 429 | 0 |
| 33 | Healthline | 5 Ways to Make Your Coffee Healthier | [nutrition, dietitian, make your coffee health... | 63106 | 1813 | 102 | 10 |
| 34 | Dr. Josh Axe | Is Coffee Bad for You? | Dr. Josh Axe | [dr axe, draxe, dr josh axe, josh axe, doctor ... | 1257057 | 15410 | 1409 | 14 |
| 35 | SAAOL Heart Center | Coffee - Is it Good for Health ? | By Dr. Bima... | [Coffee, Saaol, DrBimalChhajer, BimalChhajer] | 847151 | 18203 | 662 | 4 |
| 36 | Strength India Movement - Tamil / தமிழ் | காபி குடிப்பது நல்லதா? | COFFEE - Is it good o... | [Strength India Movement, Dr Ashwin Vijay, art... | 140021 | 4092 | 209 | 30 |
| 37 | The Infographics Show | What Happens To Your Body When You Stop Drinki... | [] | 1561856 | 30769 | 2787 | 0 |
| 38 | Discovery UK | Is Coffee Good For You? - How Stuff Works | [Discover UK, Discovery Channel, Discovery Plu... | 482299 | 1783 | 420 | 11 |
| 39 | We R Stupid | I ❤️ my daily Coffee/Tea - Is it Bad for Health?? | [#werstupid, #mondaymotivation, #health, #heal... | 114866 | 9077 | 161 | 37 |
| 40 | Sean Nalewanyj Shorts | STOP Drinking Coffee In The Morning 🛑 | [bodybuilding, fitness, build muscle, gym, wor... | 2857533 | 158977 | 2932 | 14 |
| 41 | Bestie Health | Having 1 Cup Of Coffee Every Day Can Do This T... | [Having 1 Cup Of Coffee Every Day Can Do This ... | 421174 | 5780 | 314 | 23 |
| 42 | AsapSCIENCE | Are You Consuming Your Coffee Correctly? | [Science, AsapSCIENCE, Coffee, Caffeine, Drink... | 3019907 | 30194 | 2095 | 37 |
| 43 | TMJ4 News | Does trendy 'Bulletproof Coffee' have any heal... | [WTMJ-TV, local news, 4p, news] | 13164 | 70 | 2 | 4 |
| 44 | Sean Hashmi MD | How does Coffee affect Kidney Disease? | [coffee and kidney disease, health, coffee ben... | 1941378 | 28526 | 1977 | 27 |
| 45 | Healthy Hamesha | Is Coffee Good For Your Health | कॉफ़ी के फायदे... | [coffee good or bad for health, coffee peene k... | 457976 | 13007 | 471 | 7 |
| 46 | Soukaina Kanice | Why You Should Quit Coffee ? - The Health Bene... | [quit coffee, health benefits of quitting caff... | 203422 | 6599 | 1233 | 17 |
| 47 | WatchMojo.com | Top 10 Surprising Health Benefits of Coffee | [coffee, coffee health benefits, coffee health... | 68124 | 1958 | 336 | 24 |
| 48 | NutritionFacts.org | Do the Health Benefits of Coffee Apply to Ever... | [benefits of coffee, black coffee benefits, ef... | 206751 | 3132 | 392 | 15 |
| 49 | The Dr. Gundry Podcast | Why You Should NEVER Have Milk With Your Coffe... | [dr gundry, dr. gundry, steven gundry, gundry ... | 275736 | 7180 | 835 | 28 |
In [4]:
# statistical analysis
In [5]:
summary = videostatistics.groupby('channelTitle').agg(
{'viewCount': 'sum', 'title': 'count', 'tagCount': 'sum'}
).rename(columns={'viewCount': 'Total Views', 'title': 'Video Count', 'tagCount': 'Total Tags'}).reset_index()
summary_channel = summary.sort_values(by='Total Views', ascending=False)
summary_channel
Out[5]:
| channelTitle | Total Views | Video Count | Total Tags | |
|---|---|---|---|---|
| 1 | BRIGHT SIDE | 13596855 | 1 | 18 |
| 0 | AsapSCIENCE | 9462414 | 2 | 88 |
| 40 | motivationaldoc | 7856956 | 4 | 78 |
| 5 | Doctor Mike | 4829479 | 1 | 35 |
| 14 | HealthNormal | 3030343 | 1 | 16 |
| 23 | Sean Nalewanyj Shorts | 2857533 | 1 | 14 |
| 22 | Sean Hashmi MD | 1941378 | 1 | 27 |
| 28 | The Infographics Show | 1561856 | 1 | 0 |
| 8 | Dr. Eric Berg DC | 1464948 | 2 | 50 |
| 9 | Dr. Josh Axe | 1257057 | 1 | 14 |
| 18 | KenDBerryMD | 1043476 | 2 | 21 |
| 39 | ZOE | 951104 | 1 | 1 |
| 30 | TheHealthNerd | 866033 | 1 | 19 |
| 21 | SAAOL Heart Center | 847151 | 1 | 4 |
| 7 | Dr. Carlos | 807019 | 1 | 19 |
| 31 | Thomas DeLauer | 729207 | 1 | 19 |
| 4 | Discovery UK | 482299 | 1 | 11 |
| 16 | Healthy Hamesha | 457976 | 1 | 7 |
| 27 | The Dr. Gundry Podcast | 424098 | 2 | 45 |
| 2 | Bestie Health | 421174 | 1 | 23 |
| 15 | Healthline | 264102 | 2 | 24 |
| 6 | Doctor Mike Hansen | 256735 | 1 | 11 |
| 20 | NutritionFacts.org | 206751 | 1 | 15 |
| 24 | Soukaina Kanice | 203422 | 1 | 17 |
| 19 | Mayo Clinic | 187930 | 1 | 6 |
| 13 | Health | 187833 | 1 | 24 |
| 35 | WXYZ-TV Detroit | Channel 7 | 156436 | 2 | 10 |
| 25 | Strength India Movement - Tamil / தமிழ் | 140021 | 1 | 30 |
| 37 | We R Stupid | 114866 | 1 | 37 |
| 36 | WatchMojo.com | 68124 | 1 | 24 |
| 17 | JJ Medicine | 57082 | 1 | 18 |
| 3 | Click On Detroit | Local 4 | WDIV | 55727 | 1 | 5 |
| 12 | Good Morning America | 55415 | 1 | 10 |
| 29 | The Yoga Institute | 53004 | 1 | 36 |
| 34 | WPTV News - FL Palm Beaches and Treasure Coast | 35503 | 1 | 5 |
| 32 | University of California Television (UCTV) | 34654 | 1 | 7 |
| 11 | Dr. William Li | 32596 | 1 | 0 |
| 33 | WDTNTV | 26318 | 1 | 2 |
| 10 | Dr. Livingood | 19505 | 1 | 11 |
| 26 | TMJ4 News | 13164 | 1 | 4 |
| 38 | Weight Loss Freak ! | 490 | 1 | 22 |
In [6]:
#Get data as csv files
summary_channel.to_csv('summary_channel.csv', index=False)
videostatistics.to_csv('videostatistics.csv', index=False)
In [7]:
#Data analysis
# Scatter plot: Number of tags vs views & Views vs. likes and comments
plt.figure(figsize=(6, 6))
sns.scatterplot(data=videostatistics, x="tagCount", y="viewCount")
plt.title('Plot stating no. of tags vs view count')
plt.xlabel('No. of Tags')
plt.ylabel('Total No. of views')
plt.show()
fig, ax = plt.subplots(1, 2, figsize=(12, 6))
sns.scatterplot(data=videostatistics, x='tagCount', y='likeCount', ax=ax[0])
ax[0].set_title('Plot stating no. of tags vs like count')
ax[0].set_xlabel('No. of Tags')
ax[0].set_ylabel('Total No. of likes')
sns.scatterplot(data=videostatistics, x='tagCount', y='commentCount', ax=ax[1])
ax[1].set_title('Plot stating no. of tags vs comment count')
ax[1].set_xlabel('No. of Tags')
ax[1].set_ylabel('No. of comments')
plt.tight_layout()
plt.show()
In [8]:
# Hastags associated with top performing videos
videostatistics['tags_str'] = videostatistics['tags'].apply(lambda x: ', '.join(x) if x else '')
top_videos = videostatistics.sort_values('viewCount', ascending=False).head(50)
plt.figure(figsize=(12, 8))
ax = sns.barplot(x='tags_str', y='viewCount', data=top_videos)
ax.set_xticklabels(ax.get_xticklabels(), rotation=90, ha='right')
ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, pos: '{:,.0f}K'.format(x / 1000)))
ax.set_title('Plot representing Top Performing Videos by View Count')
ax.set_xlabel('Hashtags Used in Videos')
ax.set_ylabel('Number of Views')
plt.tight_layout()
plt.show()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:6: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
ax.set_xticklabels(ax.get_xticklabels(), rotation=90, ha='right')
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 2953 (\N{TAMIL LETTER U}) missing from font(s) DejaVu Sans.
plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Matplotlib currently does not support Tamil natively.
plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 2975 (\N{TAMIL LETTER TTA}) missing from font(s) DejaVu Sans.
plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 2993 (\N{TAMIL LETTER RRA}) missing from font(s) DejaVu Sans.
plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 3021 (\N{TAMIL SIGN VIRAMA}) missing from font(s) DejaVu Sans.
plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 2986 (\N{TAMIL LETTER PA}) missing from font(s) DejaVu Sans.
plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 2991 (\N{TAMIL LETTER YA}) missing from font(s) DejaVu Sans.
plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 3007 (\N{TAMIL VOWEL SIGN I}) missing from font(s) DejaVu Sans.
plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 2970 (\N{TAMIL LETTER CA}) missing from font(s) DejaVu Sans.
plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 2990 (\N{TAMIL LETTER MA}) missing from font(s) DejaVu Sans.
plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 2992 (\N{TAMIL LETTER RA}) missing from font(s) DejaVu Sans.
plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 3009 (\N{TAMIL VOWEL SIGN U}) missing from font(s) DejaVu Sans.
plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 2980 (\N{TAMIL LETTER TA}) missing from font(s) DejaVu Sans.
plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 2997 (\N{TAMIL LETTER VA}) missing from font(s) DejaVu Sans.
plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 2950 (\N{TAMIL LETTER AA}) missing from font(s) DejaVu Sans.
plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 2994 (\N{TAMIL LETTER LA}) missing from font(s) DejaVu Sans.
plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 3019 (\N{TAMIL VOWEL SIGN OO}) missing from font(s) DejaVu Sans.
plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 2985 (\N{TAMIL LETTER NNNA}) missing from font(s) DejaVu Sans.
plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 3016 (\N{TAMIL VOWEL SIGN AI}) missing from font(s) DejaVu Sans.
plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 2965 (\N{TAMIL LETTER KA}) missing from font(s) DejaVu Sans.
plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Glyph 2995 (\N{TAMIL LETTER LLA}) missing from font(s) DejaVu Sans.
plt.tight_layout()
C:\Users\singh\AppData\Local\Temp\ipykernel_5544\3898361022.py:11: UserWarning: Tight layout not applied. The bottom and top margins cannot be made large enough to accommodate all Axes decorations.
plt.tight_layout()
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 2953 (\N{TAMIL LETTER U}) missing from font(s) DejaVu Sans.
fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Matplotlib currently does not support Tamil natively.
fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 2975 (\N{TAMIL LETTER TTA}) missing from font(s) DejaVu Sans.
fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 2993 (\N{TAMIL LETTER RRA}) missing from font(s) DejaVu Sans.
fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 3021 (\N{TAMIL SIGN VIRAMA}) missing from font(s) DejaVu Sans.
fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 2986 (\N{TAMIL LETTER PA}) missing from font(s) DejaVu Sans.
fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 2991 (\N{TAMIL LETTER YA}) missing from font(s) DejaVu Sans.
fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 3007 (\N{TAMIL VOWEL SIGN I}) missing from font(s) DejaVu Sans.
fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 2970 (\N{TAMIL LETTER CA}) missing from font(s) DejaVu Sans.
fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 2990 (\N{TAMIL LETTER MA}) missing from font(s) DejaVu Sans.
fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 2992 (\N{TAMIL LETTER RA}) missing from font(s) DejaVu Sans.
fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 3009 (\N{TAMIL VOWEL SIGN U}) missing from font(s) DejaVu Sans.
fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 2980 (\N{TAMIL LETTER TA}) missing from font(s) DejaVu Sans.
fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 2997 (\N{TAMIL LETTER VA}) missing from font(s) DejaVu Sans.
fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 2950 (\N{TAMIL LETTER AA}) missing from font(s) DejaVu Sans.
fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 2994 (\N{TAMIL LETTER LA}) missing from font(s) DejaVu Sans.
fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 3019 (\N{TAMIL VOWEL SIGN OO}) missing from font(s) DejaVu Sans.
fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 2985 (\N{TAMIL LETTER NNNA}) missing from font(s) DejaVu Sans.
fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 3016 (\N{TAMIL VOWEL SIGN AI}) missing from font(s) DejaVu Sans.
fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 2965 (\N{TAMIL LETTER KA}) missing from font(s) DejaVu Sans.
fig.canvas.print_figure(bytes_io, **kw)
C:\Users\singh\AppData\Local\Programs\Python\Python312\Lib\site-packages\IPython\core\pylabtools.py:170: UserWarning: Glyph 2995 (\N{TAMIL LETTER LLA}) missing from font(s) DejaVu Sans.
fig.canvas.print_figure(bytes_io, **kw)
In [9]:
# Word cloud
stop_words = set(stopwords.words('english'))
videostatistics['tags_no_stopwords'] = videostatistics['tags'].apply(
lambda x: [item for item in x if item.lower() not in stop_words] if x else []
)
word_cloud = [word for tags in videostatistics['tags_no_stopwords'] for word in tags]
wordcloudstr = ' '.join(word_cloud)
def plot_cloud(wordcloud):
plt.figure(figsize=(30, 20))
plt.imshow(wordcloud)
plt.axis("off")
plt.show()
wordcloud = WordCloud(
width=2000, height=1000, random_state=1, background_color='black',
colormap='cividis', collocations=False
).generate(wordcloudstr)
plot_cloud(wordcloud)
#for top 50 videos
data = videostatistics.sort_values(by='viewCount', ascending=False).head(50)
stop_words = set(stopwords.words('english'))
data['tags_no_stopwords'] = data['tags'].apply(
lambda x: [item for item in x if item.lower() not in stop_words] if x else []
)
word_cloud = [word for tags in data['tags_no_stopwords'] for word in tags]
wordcloudstr = ' '.join(word_cloud)
def plot_cloud(wordcloud):
plt.figure(figsize=(30, 20))
plt.imshow(wordcloud)
plt.axis("off")
plt.show()
wordcloud = WordCloud(
width=2000, height=1000, random_state=1, background_color='black',
colormap='cividis', collocations=False
).generate(wordcloudstr)
plot_cloud(wordcloud)
#Applying filters
cofe = ["coffee", "espresso", "latte", "cappuccino", "americano", "mocha", "barista", "brewing", "roasting"]
health = ["health", "healthy", "nutrition", "wellness", "fitness", "lifestyle", "diet", "exercise", "well-being"]
stop_words = set(stopwords.words('english')) #combine all the words
stop_words.update(cofe)
stop_words.update(health)
videostatistics['tags_no_stopwords'] = videostatistics['tags'].apply(
lambda x: [item for item in x if item.lower() not in stop_words] if x else []
)
word_cloud = [word for tags in videostatistics['tags_no_stopwords'] for word in tags]
wordcloudstr = ' '.join(word_cloud)
def plot_cloud(wordcloud):
plt.figure(figsize=(30, 20))
plt.imshow(wordcloud)
plt.axis("off")
plt.show()
wordcloud = WordCloud(
width=2000, height=1000, random_state=1, background_color='black',
colormap='cividis', collocations=False
).generate(wordcloudstr)
plot_cloud(wordcloud)